In [1]:
from configparser import ConfigParser
from os.path import join
from os import pardir

Configurar las credenciales para acceder al API de Twitter


In [2]:
config = ConfigParser()
config.read(join(pardir,'src','credentials.ini'))


Out[2]:
['../src/credentials.ini']

In [3]:
APP_KEY = config['twitter']['app_key']
APP_SECRET = config['twitter']['app_secret']
OAUTH_TOKEN =  config['twitter']['oauth_token']
OAUTH_TOKEN_SECRET =  config['twitter']['oauth_token_secret']

In [4]:
from twitter import oauth, Twitter, TwitterHTTPError

Esta es la molona librería que vamos a utilizar: https://github.com/sixohsix/twitter/tree/master


In [5]:
auth = oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                   APP_KEY, APP_SECRET)

twitter_api = Twitter(auth=auth)
twitter_api.retry = True

1 . Recoger tweets a partir de un id


In [6]:
tweet = twitter_api.statuses.show(_id='628949369883000832')

In [7]:
tweet['text']


Out[7]:
"dear @Microsoft the newOoffice for Mac is great and all, but no Lync update? C'mon."

2. Recoger tweets de una usuaria


In [8]:
femfreq_tweet_search = twitter_api.statuses.user_timeline(screen_name="femfreq", count=100)

In [9]:
femfreq_tweet_search[0]['user']['description']


Out[9]:
'Feminist Frequency is an educational nonprofit working for a more equitable media landscape and online world. Created by Anita Sarkeesian.'

In [10]:
femfreq_tweet_search[-1]['text']


Out[10]:
"Time is running out! Just 4 days left to back Ordinary Women, it doesn't happen without you! https://t.co/gWbfnMQOcp https://t.co/3FJIs5Bozy"

3. Recoger tweets a partir de una consulta


In [11]:
tweets = twitter_api.search.tweets(q="#feminazi", count=100)

In [12]:
tweets['search_metadata']


Out[12]:
{'completed_in': 0.121,
 'count': 100,
 'max_id': 723548002791510017,
 'max_id_str': '723548002791510017',
 'next_results': '?max_id=722730474121084927&q=%23feminazi&count=100&include_entities=1',
 'query': '%23feminazi',
 'refresh_url': '?since_id=723548002791510017&q=%23feminazi&include_entities=1',
 'since_id': 0,
 'since_id_str': '0'}

In [13]:
import pandas as pd

text_gathered = [tweet_data['text'] for tweet_data in tweets['statuses']]
num_tweets = len(text_gathered)
pd_tweets = pd.DataFrame( {'tweet_text': text_gathered,
                           'troll_tag': [False] * num_tweets})

In [14]:
pd_tweets.head()


Out[14]:
troll_tag tweet_text
0 False @1800flowers Ads pay 4 Limbaugh to call us str...
1 False Stop the spread of cultural marxism. #Feminazi...
2 False #VivasNosQueremos\n#NantzinVive en mi corazón\...
3 False RT @jalgete: Todo es "violencia de género" y "...
4 False RT @jalgete: Todo es "violencia de género" y "...

In [15]:
pd_tweets.to_csv('maybe_troll.csv')

In [16]:
ls


0. Gather data.ipynb  maybe_troll.csv